In [1]:
import pandas as pd
import plotly.express as px
import numpy as np
import matplotlib.pyplot as plt
import os
from mpl_toolkits import mplot3d
from plotly.offline import download_plotlyjs, init_notebook_mode
from plotly.offline import plot, iplot
import plotly.graph_objects as go
%matplotlib inline
In [2]:
#Create a path to where your data is stored.
path = r'C:\Users\jinu5\Desktop\careerfoundry\ML\DataSet'
In [3]:
#Read in the European weather data.
climate = pd.read_csv(os.path.join(path, 'prepared data', 'df_scaled.csv'))
climate
Out[3]:
| DATE | MONTH | BASEL_cloud_cover | BASEL_wind_speed | BASEL_humidity | BASEL_pressure | BASEL_global_radiation | BASEL_precipitation | BASEL_snow_depth | BASEL_sunshine | ... | VALENTIA_cloud_cover | VALENTIA_humidity | VALENTIA_pressure | VALENTIA_global_radiation | VALENTIA_precipitation | VALENTIA_snow_depth | VALENTIA_sunshine | VALENTIA_temp_mean | VALENTIA_temp_min | VALENTIA_temp_max | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 19600101 | 1 | 0.660514 | -0.02793 | 0.826097 | -0.001949 | -1.101066 | -0.265148 | -0.179228 | -0.902918 | ... | -0.443701 | 0.761754 | -1.299744 | -0.806427 | -0.088407 | -0.024706 | 0.372147 | -0.668215 | -0.519743 | -0.752237 |
| 1 | 19600102 | 1 | 0.244897 | -0.02793 | 0.735760 | -0.001949 | -1.058108 | 1.658760 | -0.179228 | -0.810126 | ... | 0.783085 | 1.183580 | -1.262455 | -1.042055 | 0.503361 | -0.024706 | -0.829285 | -0.548046 | -0.629054 | -0.407141 |
| 2 | 19600103 | 1 | 1.076130 | -0.02793 | 1.277781 | -0.001949 | -1.251420 | 0.155707 | -0.179228 | -1.065304 | ... | 0.783085 | 1.183580 | -0.432779 | -1.136306 | -0.396127 | -0.024706 | -1.009500 | -0.067372 | 0.054135 | -0.177078 |
| 3 | 19600104 | 1 | -1.001953 | -0.02793 | 1.458455 | -0.001949 | -0.821838 | -0.445514 | -0.179228 | -0.114186 | ... | 0.783085 | 0.480538 | 0.387574 | -1.183432 | 0.669056 | -0.024706 | -1.039536 | -0.998679 | -0.164486 | -0.838511 |
| 4 | 19600105 | 1 | 0.244897 | -0.02793 | 1.729466 | -0.001949 | -0.746661 | -0.164944 | -0.179228 | 0.187388 | ... | -1.670486 | -0.363113 | 1.729970 | -0.794645 | -0.490810 | -0.024706 | 0.672505 | -1.509396 | -1.339569 | -1.471186 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 22945 | 20221027 | 10 | -1.833187 | -0.02793 | 0.284075 | 1.037367 | -0.005632 | -0.004619 | -0.179228 | 0.720943 | ... | -0.443701 | -0.081896 | -0.003958 | -0.005290 | -0.005560 | -0.024706 | -0.018319 | -0.007287 | -0.000520 | -0.004530 |
| 22946 | 20221028 | 10 | 0.244897 | -0.02793 | 0.103401 | 0.976231 | -0.005632 | -0.004619 | -0.179228 | 0.187388 | ... | -0.443701 | -0.081896 | -0.003958 | -0.005290 | -0.005560 | -0.024706 | -0.018319 | -0.007287 | -0.000520 | -0.004530 |
| 22947 | 20221029 | 10 | -0.586336 | -0.02793 | 0.013064 | 0.716402 | -0.005632 | -0.004619 | -0.179228 | 0.349774 | ... | -0.443701 | -0.081896 | -0.003958 | -0.005290 | -0.005560 | -0.024706 | -0.018319 | -0.007287 | -0.000520 | -0.004530 |
| 22948 | 20221030 | 10 | -0.170720 | -0.02793 | 0.374412 | 0.487141 | -0.005632 | -0.004619 | -0.179228 | 0.280180 | ... | -0.443701 | -0.081896 | -0.003958 | -0.005290 | -0.005560 | -0.024706 | -0.018319 | -0.007287 | -0.000520 | -0.004530 |
| 22949 | 20221031 | 10 | -0.170720 | -0.02793 | 0.735760 | 0.196744 | -0.005632 | -0.004619 | -0.179228 | -0.322968 | ... | -0.443701 | -0.081896 | -0.003958 | -0.005290 | -0.005560 | -0.024706 | -0.018319 | -0.007287 | -0.000520 | -0.004530 |
22950 rows × 170 columns
In [4]:
#Reduce to just the mean temperatures
df = climate[['DATE', 'MONTH','BASEL_temp_mean',
'BELGRADE_temp_mean',
'BUDAPEST_temp_mean',
'DEBILT_temp_mean',
'DUSSELDORF_temp_mean',
'GDANSK_temp_mean',
'HEATHROW_temp_mean',
'KASSEL_temp_mean',
'LJUBLJANA_temp_mean',
'MAASTRICHT_temp_mean',
'MADRID_temp_mean',
'MUNCHENB_temp_mean',
'OSLO_temp_mean',
'ROMA_temp_mean',
'SONNBLICK_temp_mean',
'STOCKHOLM_temp_mean',
'TOURS_temp_mean',
'VALENTIA_temp_mean']].copy()
In [5]:
df
Out[5]:
| DATE | MONTH | BASEL_temp_mean | BELGRADE_temp_mean | BUDAPEST_temp_mean | DEBILT_temp_mean | DUSSELDORF_temp_mean | GDANSK_temp_mean | HEATHROW_temp_mean | KASSEL_temp_mean | LJUBLJANA_temp_mean | MAASTRICHT_temp_mean | MADRID_temp_mean | MUNCHENB_temp_mean | OSLO_temp_mean | ROMA_temp_mean | SONNBLICK_temp_mean | STOCKHOLM_temp_mean | TOURS_temp_mean | VALENTIA_temp_mean | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 19600101 | 1 | -0.528623 | -1.016876 | -1.099163 | -0.114356 | -0.105836 | -0.927601 | -0.106469 | -0.182904 | -1.370824 | -0.097084 | -0.988280 | -0.265742 | -0.186575 | -1.280450 | -0.124331 | -0.391072 | -0.257321 | -0.668215 |
| 1 | 19600102 | 1 | -0.582946 | -1.107669 | -1.110927 | -0.367511 | -0.370915 | -0.825294 | -0.892676 | -0.212437 | -1.043881 | -0.232112 | -0.691740 | -0.353714 | -0.368598 | -0.539569 | -0.650834 | -0.415953 | -0.335759 | -0.548046 |
| 2 | 19600103 | 1 | -0.257010 | -1.084971 | -1.063873 | -0.509912 | -0.532908 | -0.940389 | -0.490837 | -0.389635 | -0.741156 | -0.487164 | -0.853490 | -0.403983 | -0.550620 | -0.876333 | -0.650834 | -0.615003 | -0.210258 | -0.067372 |
| 3 | 19600104 | 1 | -0.555784 | -1.209812 | -1.146217 | -0.525734 | -0.577088 | -1.042696 | -0.316124 | -0.493001 | -0.910682 | -0.472161 | -0.624345 | -0.642763 | -0.417137 | -0.775304 | -0.943336 | -0.764290 | -0.069069 | -0.998679 |
| 4 | 19600105 | 1 | -1.003946 | -1.209812 | -1.087400 | -0.320045 | -0.444548 | -0.978754 | -0.403481 | -0.552067 | -0.862246 | -0.307127 | -0.381721 | -0.906678 | -0.332193 | -0.926848 | -0.621584 | -0.503037 | -0.037694 | -1.509396 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 22945 | 20221027 | 10 | 0.747959 | 0.628750 | -0.005167 | 0.898267 | 1.042841 | 0.440753 | 0.906865 | -0.005705 | 0.481853 | 1.268193 | 0.683127 | 0.664244 | 0.395898 | -0.000746 | 0.826301 | 0.517090 | 1.295755 | -0.007287 |
| 22946 | 20221028 | 10 | 0.856604 | 0.367720 | -0.005167 | 0.945734 | 1.278467 | 0.466330 | 0.802037 | -0.005705 | 0.263891 | 1.313202 | 0.561815 | 0.890456 | 0.541516 | -0.000746 | 1.074927 | 0.641496 | 1.358505 | -0.007287 |
| 22947 | 20221029 | 10 | 0.856604 | 0.083991 | -0.005167 | 0.914089 | 1.101748 | 0.786039 | 0.924336 | -0.005705 | 0.300218 | 1.208181 | 0.548336 | 1.053832 | 0.395898 | -0.000746 | 1.221178 | 0.716139 | 1.405568 | -0.007287 |
| 22948 | 20221030 | 10 | 0.680055 | 0.265577 | -0.005167 | 0.692578 | 0.880848 | 0.376811 | 0.697210 | -0.005705 | 0.397090 | 0.923123 | 0.103526 | 0.689378 | -0.065226 | -0.000746 | 1.235803 | 0.019467 | 0.668249 | -0.007287 |
| 22949 | 20221031 | 10 | 0.422023 | 0.197483 | -0.005167 | 0.439422 | 0.659949 | 0.159409 | 0.435140 | -0.005705 | 0.348654 | 0.773092 | -0.112140 | 0.488300 | 0.335224 | -0.000746 | 0.987177 | 0.293160 | 0.809438 | -0.007287 |
22950 rows × 20 columns
In [ ]:
#You'll need to reduce the dataset to only one year of data. Analyze and pick which year you want to use.
In [6]:
#Drop the DATE and MONTH data as those numbers are not scaled with the rest.
notemp = df.drop(['DATE','MONTH'], axis=1)
In [7]:
#Look at a whisker plot of the data to see variations in temperatures
notemp.boxplot(figsize=(15,15))
plt.xticks(rotation=90)
Out[7]:
(array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
18]),
[Text(1, 0, 'BASEL_temp_mean'),
Text(2, 0, 'BELGRADE_temp_mean'),
Text(3, 0, 'BUDAPEST_temp_mean'),
Text(4, 0, 'DEBILT_temp_mean'),
Text(5, 0, 'DUSSELDORF_temp_mean'),
Text(6, 0, 'GDANSK_temp_mean'),
Text(7, 0, 'HEATHROW_temp_mean'),
Text(8, 0, 'KASSEL_temp_mean'),
Text(9, 0, 'LJUBLJANA_temp_mean'),
Text(10, 0, 'MAASTRICHT_temp_mean'),
Text(11, 0, 'MADRID_temp_mean'),
Text(12, 0, 'MUNCHENB_temp_mean'),
Text(13, 0, 'OSLO_temp_mean'),
Text(14, 0, 'ROMA_temp_mean'),
Text(15, 0, 'SONNBLICK_temp_mean'),
Text(16, 0, 'STOCKHOLM_temp_mean'),
Text(17, 0, 'TOURS_temp_mean'),
Text(18, 0, 'VALENTIA_temp_mean')])
In [6]:
#Reduce your dataset to a single year
dfyear = df[df['DATE'].astype(str).str.contains('2021')] #<-----INSERT YEAR HERE
dfyear.head(20)
Out[6]:
| DATE | MONTH | BASEL_temp_mean | BELGRADE_temp_mean | BUDAPEST_temp_mean | DEBILT_temp_mean | DUSSELDORF_temp_mean | GDANSK_temp_mean | HEATHROW_temp_mean | KASSEL_temp_mean | LJUBLJANA_temp_mean | MAASTRICHT_temp_mean | MADRID_temp_mean | MUNCHENB_temp_mean | OSLO_temp_mean | ROMA_temp_mean | SONNBLICK_temp_mean | STOCKHOLM_temp_mean | TOURS_temp_mean | VALENTIA_temp_mean | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 771 | 19620210 | 2 | -1.139752 | -1.289256 | -1.169744 | -0.937112 | -0.974707 | -0.914813 | -1.137274 | -0.965530 | -1.068099 | -1.057280 | -1.338737 | -1.145458 | -0.477811 | -0.707951 | -0.723959 | -0.453275 | -1.261329 | -0.247625 |
| 772 | 19620211 | 2 | -1.139752 | -1.380049 | -1.216797 | -0.921290 | -0.989434 | -0.659045 | -1.049918 | -0.847397 | -1.201298 | -0.892246 | -1.217425 | -1.120324 | -0.635564 | -0.556407 | -0.811710 | -0.615003 | -0.665199 | -0.728299 |
| 773 | 19620212 | 2 | -0.528623 | -0.892036 | -1.122690 | -0.399156 | -0.488728 | -0.710199 | -0.735435 | -0.670199 | -0.934900 | -0.517170 | -0.893927 | -0.177771 | -0.866127 | -0.876333 | -0.738584 | -0.851374 | -1.010327 | -0.548046 |
| 774 | 19620213 | 2 | -1.085430 | -0.369975 | -0.628627 | -1.174445 | -1.372326 | -1.119426 | -1.014975 | -1.187028 | -0.862246 | -1.282325 | -0.826531 | -0.956948 | -0.951071 | -1.213097 | -1.104212 | -1.436081 | -0.602449 | -1.148890 |
| 775 | 19620214 | 2 | -1.465688 | -1.391398 | -1.099163 | -1.316845 | -1.401779 | -1.196156 | -1.539113 | -1.438059 | -1.201298 | -1.492368 | -0.988280 | -1.497345 | -1.120958 | -1.431994 | -2.332720 | -1.510724 | -1.449581 | -1.569480 |
| 776 | 19620215 | 2 | -1.587914 | -1.607032 | -1.416775 | -1.269379 | -1.475413 | -1.439135 | -1.294515 | -1.526658 | -1.322388 | -1.447359 | -1.325258 | -1.748692 | -1.557813 | -2.021331 | -2.873848 | -1.908823 | -1.465269 | -1.269058 |
| 777 | 19620216 | 2 | -0.691591 | -1.709174 | -1.628516 | -0.430800 | -0.518182 | -1.081061 | -0.980032 | -0.611133 | -1.164971 | -0.532173 | -1.298299 | -0.705600 | -0.356463 | -2.156037 | -1.747716 | -0.913576 | -0.806388 | -0.457920 |
| 778 | 19620217 | 2 | -0.990365 | -1.153066 | -0.922712 | -0.889645 | -1.048340 | -1.081061 | -0.875205 | -0.847397 | -0.801701 | -1.057280 | -0.947843 | -1.032352 | -0.671969 | -1.011039 | -1.513715 | -1.560487 | -0.727950 | -0.337751 |
| 779 | 19620218 | 2 | -1.166914 | -1.311954 | -1.146217 | -0.842178 | -0.915801 | -1.157791 | -0.822791 | -0.906464 | -0.813810 | -1.072283 | -0.893927 | -1.208295 | -1.084554 | -1.314127 | -2.157219 | -1.610249 | -1.073078 | -0.698257 |
| 780 | 19620219 | 2 | -1.180494 | -1.334652 | -1.099163 | -0.636489 | -0.797988 | -0.914813 | -0.805320 | -0.581600 | -1.043881 | -0.757219 | -0.705220 | -0.894111 | -0.587025 | -1.583538 | -1.148087 | -1.100185 | -1.041702 | -0.698257 |
| 4423 | 19720210 | 2 | -0.868139 | -0.517514 | -0.816841 | -1.237734 | -0.871621 | -0.876447 | -1.294515 | -0.640666 | -0.922791 | -0.832234 | -0.759136 | -0.881544 | -0.781183 | -0.926848 | -0.475333 | -0.975779 | -1.229954 | -1.659606 |
| 4424 | 19720211 | 2 | -0.610107 | -0.472117 | -0.910949 | -0.842178 | -0.665448 | -0.902024 | -1.154745 | -0.611133 | -1.055990 | -0.667200 | -0.705220 | -0.743302 | -0.829722 | -1.280450 | -0.840960 | -0.913576 | -0.571074 | -1.629564 |
| 4425 | 19720212 | 2 | -0.678010 | -0.415371 | -0.581573 | -0.842178 | -0.753808 | -0.722987 | -1.014975 | -0.640666 | -0.995445 | -0.757219 | -0.866969 | -0.831274 | -0.975340 | -1.078392 | -1.177337 | -0.876255 | -0.790700 | -1.509396 |
| 4426 | 19720213 | 2 | -0.827397 | -0.653703 | -0.558047 | -1.047867 | -0.989434 | -0.978754 | -1.049918 | -0.891697 | -0.741156 | -0.817231 | -0.610866 | -0.994650 | -0.635564 | -1.128906 | -1.308963 | -0.739408 | -0.947577 | -1.509396 |
| 4427 | 19720214 | 2 | -0.922462 | -0.937432 | -0.769788 | -0.968756 | -0.930527 | -0.876447 | -1.014975 | -0.699732 | -0.813810 | -0.937255 | -1.150029 | -0.919246 | -0.732643 | -0.859495 | -1.367464 | -0.739408 | -0.822076 | -1.509396 |
| 4428 | 19720215 | 2 | -0.936043 | -1.164415 | -0.805078 | -1.221912 | -0.694901 | -0.838082 | -1.242102 | -0.729265 | -0.632175 | -0.922252 | -1.163509 | -1.007217 | -0.902531 | -1.078392 | -1.674591 | -0.714527 | -1.104453 | -1.299100 |
| 4429 | 19720216 | 2 | -0.732333 | -0.892036 | -0.875659 | -0.763067 | -0.709628 | -0.889236 | -0.735435 | -0.655433 | -1.225516 | -0.697206 | -1.150029 | -1.195728 | -0.902531 | -1.499347 | -1.133462 | -0.751849 | -0.712262 | -1.239016 |
| 4430 | 19720217 | 2 | -0.922462 | -0.823941 | -0.910949 | -0.668134 | -0.503455 | -0.876447 | -1.014975 | -0.714499 | -0.789592 | -0.637194 | -1.446569 | -1.070054 | -0.914666 | -1.280450 | -0.548458 | -0.751849 | -0.853451 | -1.389227 |
| 4431 | 19720218 | 2 | -0.854559 | -0.767195 | -0.875659 | -1.206090 | -1.077794 | -0.850871 | -1.119803 | -0.965530 | -0.777483 | -0.967261 | -1.271341 | -1.082622 | -1.157363 | -0.455378 | -0.446083 | -0.888695 | -0.759325 | -1.509396 |
| 4432 | 19720219 | 2 | -1.275559 | -0.937432 | -0.816841 | -1.285201 | -1.166153 | -1.017119 | -1.259573 | -0.758798 | -0.753265 | -1.237316 | -1.203946 | -0.592494 | -1.363655 | -0.489054 | -0.548458 | -0.913576 | -1.041702 | -2.170323 |
In [7]:
dfyear = df[df['DATE'].astype(str).str.startswith('2021')]
In [10]:
dfyear.head(20)
Out[10]:
| DATE | MONTH | BASEL_temp_mean | BELGRADE_temp_mean | BUDAPEST_temp_mean | DEBILT_temp_mean | DUSSELDORF_temp_mean | GDANSK_temp_mean | HEATHROW_temp_mean | KASSEL_temp_mean | LJUBLJANA_temp_mean | MAASTRICHT_temp_mean | MADRID_temp_mean | MUNCHENB_temp_mean | OSLO_temp_mean | ROMA_temp_mean | SONNBLICK_temp_mean | STOCKHOLM_temp_mean | TOURS_temp_mean | VALENTIA_temp_mean | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 22281 | 20210101 | 1 | -1.112591 | -0.653703 | -0.005167 | -1.111156 | -1.239786 | -1.272886 | -1.713826 | -0.005705 | -0.995445 | -1.222313 | -1.473528 | -1.107756 | -0.744778 | -0.000746 | -1.206588 | -0.540359 | -1.496644 | -0.007287 |
| 22282 | 20210102 | 1 | -1.221236 | -0.551561 | -0.005167 | -1.016223 | -1.195607 | -1.081061 | -1.504171 | -0.005705 | -0.741156 | -1.282325 | -1.716151 | -1.271132 | -0.671969 | -0.000746 | -0.884835 | -0.702087 | -1.763333 | -0.007287 |
| 22283 | 20210103 | 1 | -1.384204 | -0.415371 | -0.005167 | -1.063690 | -1.136700 | -1.042696 | -1.381872 | -0.005705 | -0.862246 | -1.207310 | -1.608319 | -1.208295 | -0.951071 | -0.000746 | -1.016461 | -0.938458 | -1.465269 | -0.007287 |
| 22284 | 20210104 | 1 | -1.329882 | -0.517514 | -0.005167 | -1.142801 | -1.269240 | -0.940389 | -1.259573 | -0.005705 | -0.886464 | -1.297328 | -1.648756 | -1.245997 | -1.145228 | -0.000746 | -1.191962 | -0.863814 | -1.575082 | -0.007287 |
| 22285 | 20210105 | 1 | -1.384204 | -0.699100 | -0.005167 | -1.126979 | -1.239786 | -0.799717 | -1.399343 | -0.005705 | -0.874355 | -1.267322 | -1.810505 | -1.296267 | -1.582083 | -0.000746 | -1.382089 | -0.975779 | -1.496644 | -0.007287 |
| 22286 | 20210106 | 1 | -1.384204 | -0.619656 | -0.005167 | -1.206090 | -1.180880 | -0.902024 | -1.189688 | -0.005705 | -1.152862 | -1.222313 | -1.823984 | -1.283700 | -1.873319 | -0.000746 | -1.382089 | -1.125066 | -1.402518 | -0.007287 |
| 22287 | 20210107 | 1 | -1.234817 | -0.755846 | -0.005167 | -1.174445 | -1.328146 | -0.991543 | -1.783711 | -0.005705 | -1.164971 | -1.312331 | -1.958775 | -1.132891 | -1.424330 | -0.000746 | -1.703841 | -0.913576 | -1.386830 | -0.007287 |
| 22288 | 20210108 | 1 | -1.397785 | -1.073622 | -0.005167 | -1.411779 | -1.431233 | -0.965966 | -1.906010 | -0.005705 | -1.164971 | -1.387347 | -2.066608 | -1.145458 | -1.339386 | -0.000746 | -1.981718 | -0.838933 | -1.826084 | -0.007287 |
| 22289 | 20210109 | 1 | -1.682979 | -1.402747 | -0.005167 | -1.680757 | -1.490139 | -1.004331 | -1.766240 | -0.005705 | -1.407151 | -1.537377 | -2.093566 | -1.283700 | -1.387925 | -0.000746 | -1.776966 | -0.975779 | -1.888835 | -0.007287 |
| 22290 | 20210110 | 1 | -1.601495 | -1.425445 | -0.005167 | -1.332668 | -1.460686 | -0.953178 | -1.608998 | -0.005705 | -1.395042 | -1.447359 | -1.850942 | -1.736125 | -1.218037 | -0.000746 | -1.747716 | -0.975779 | -1.575082 | -0.007287 |
| 22291 | 20210111 | 1 | -1.764463 | -1.470842 | -0.005167 | -0.794712 | -1.210333 | -1.004331 | -1.207159 | -0.005705 | -1.491914 | -1.207310 | -2.093566 | -1.899501 | -1.460734 | -0.000746 | -1.850092 | -0.975779 | -1.841772 | -0.007287 |
| 22292 | 20210112 | 1 | -1.126172 | -1.504889 | -0.005167 | -0.763067 | -0.842167 | -1.004331 | -0.665550 | -0.005705 | -1.770421 | -0.832234 | -2.470980 | -1.145458 | -1.751971 | -0.000746 | -1.177337 | -1.025542 | -0.884826 | -0.007287 |
| 22293 | 20210113 | 1 | -0.786655 | -1.357351 | -0.005167 | -0.905467 | -1.180880 | -1.170580 | -1.049918 | -0.005705 | -1.576677 | -1.147298 | -2.255315 | -1.057487 | -1.254442 | -0.000746 | -1.557590 | -1.025542 | -0.335759 | -0.007287 |
| 22294 | 20210114 | 1 | -1.139752 | -1.414096 | -0.005167 | -1.395957 | -1.387053 | -1.119426 | -1.049918 | -0.005705 | -1.395042 | -1.342338 | -2.026170 | -1.145458 | -1.521409 | -0.000746 | -1.689216 | -1.411200 | -0.367134 | -0.007287 |
| 22295 | 20210115 | 1 | -1.533592 | -1.391398 | -0.005167 | -1.395957 | -1.490139 | -1.413558 | -1.608998 | -0.005705 | -1.552459 | -1.552380 | -1.958775 | -1.547614 | -1.982533 | -0.000746 | -1.908592 | -1.871501 | -0.994640 | -0.007287 |
| 22296 | 20210116 | 1 | -1.859527 | -1.731872 | -0.005167 | -1.459245 | -1.534319 | -1.503077 | -1.294515 | -0.005705 | -1.782530 | -1.552380 | -1.716151 | -1.484778 | -2.006803 | -0.000746 | -2.157219 | -1.286794 | -1.731958 | -0.007287 |
| 22297 | 20210117 | 1 | -1.316301 | -1.845364 | -0.005167 | -1.047867 | -1.401779 | -2.986526 | -1.032446 | -0.005705 | -1.758312 | -1.102289 | -1.797026 | -1.447075 | -1.618487 | -0.000746 | -1.747716 | -1.149947 | -0.775013 | -0.007287 |
| 22298 | 20210118 | 1 | -1.180494 | -1.788618 | -0.005167 | -0.826356 | -0.871621 | -2.641240 | -0.892676 | -0.005705 | -1.431369 | -0.862240 | -1.540923 | -1.019785 | -1.145228 | -0.000746 | -1.718466 | -1.087745 | -1.277017 | -0.007287 |
| 22299 | 20210119 | 1 | -1.343462 | -1.164415 | -0.005167 | -0.367511 | -0.665448 | -2.027399 | -0.648078 | -0.005705 | -1.237625 | -0.697206 | -1.554402 | -0.906678 | -0.853992 | -0.000746 | -1.162712 | -0.888695 | -0.994640 | -0.007287 |
| 22300 | 20210120 | 1 | -1.003946 | -0.619656 | -0.005167 | -0.161822 | -0.341462 | -0.953178 | -0.246239 | -0.005705 | -0.692720 | -0.172100 | -1.513965 | -0.579927 | -0.696239 | -0.000746 | -0.665459 | -0.801611 | -0.382822 | -0.007287 |
In [13]:
dfyear.describe()
Out[13]:
| DATE | MONTH | BASEL_temp_mean | BELGRADE_temp_mean | BUDAPEST_temp_mean | DEBILT_temp_mean | DUSSELDORF_temp_mean | GDANSK_temp_mean | HEATHROW_temp_mean | KASSEL_temp_mean | LJUBLJANA_temp_mean | MAASTRICHT_temp_mean | MADRID_temp_mean | MUNCHENB_temp_mean | OSLO_temp_mean | ROMA_temp_mean | SONNBLICK_temp_mean | STOCKHOLM_temp_mean | TOURS_temp_mean | VALENTIA_temp_mean | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 3.650000e+02 | 365.000000 | 365.000000 | 365.000000 | 3.650000e+02 | 365.000000 | 365.000000 | 365.000000 | 365.000000 | 3.650000e+02 | 365.000000 | 365.000000 | 365.000000 | 365.000000 | 365.000000 | 3.650000e+02 | 365.000000 | 365.000000 | 365.000000 | 3.650000e+02 |
| mean | 2.021067e+07 | 6.526027 | 0.032909 | 0.128766 | -5.166671e-03 | 0.072520 | 0.013712 | 0.088705 | 0.085045 | -5.705221e-03 | 0.101863 | 0.062647 | 0.089456 | 0.090138 | 0.102767 | -7.461640e-04 | 0.045401 | 0.099155 | 0.116518 | -7.287352e-03 |
| std | 3.454755e+02 | 3.452584 | 0.949455 | 1.005278 | 1.042263e-17 | 0.966594 | 0.975583 | 1.066883 | 1.010767 | 3.039933e-17 | 1.014904 | 0.964474 | 1.009745 | 0.926124 | 1.061531 | 4.777038e-18 | 1.026706 | 1.048349 | 0.985446 | 4.603327e-17 |
| min | 2.021010e+07 | 1.000000 | -2.212624 | -2.049648 | -5.166671e-03 | -2.503513 | -2.668269 | -2.986526 | -2.185550 | -5.705221e-03 | -1.855184 | -2.572588 | -2.470980 | -2.125714 | -2.152421 | -7.461640e-04 | -2.815348 | -2.294481 | -2.281025 | -7.287352e-03 |
| 25% | 2.021040e+07 | 4.000000 | -0.745913 | -0.631005 | -5.166671e-03 | -0.683956 | -0.680175 | -0.748564 | -0.683021 | -5.705221e-03 | -0.704829 | -0.667200 | -0.664782 | -0.667898 | -0.574890 | -7.461640e-04 | -0.636209 | -0.577681 | -0.649512 | -7.287352e-03 |
| 50% | 2.021070e+07 | 7.000000 | -0.012558 | -0.040850 | -5.166671e-03 | 0.043867 | -0.061656 | -0.006839 | -0.001641 | -5.705221e-03 | -0.087270 | -0.022069 | -0.085181 | 0.035875 | 0.068257 | -7.461640e-04 | 0.036546 | 0.019467 | 0.056432 | -7.287352e-03 |
| 75% | 2.021100e+07 | 10.000000 | 0.883765 | 0.935177 | -5.166671e-03 | 0.993200 | 0.880848 | 1.029017 | 0.994221 | -5.705221e-03 | 0.941995 | 0.923123 | 0.817918 | 0.865322 | 1.063315 | -7.461640e-04 | 0.855551 | 0.940070 | 0.966314 | -7.287352e-03 |
| max | 2.021123e+07 | 12.000000 | 2.078863 | 2.263027 | -5.166671e-03 | 2.211512 | 2.559684 | 2.320641 | 2.391923 | -5.705221e-03 | 2.092351 | 2.423428 | 2.489325 | 2.109492 | 2.082643 | -7.461640e-04 | 2.215685 | 2.358296 | 2.111511 | -7.287352e-03 |
In [ ]:
#Pick which weather station you want to use. Below is a 3D visualization of the temperatures for that year.
In [19]:
#Drop the DATE and MONTH data as those numbers are not scaled with the rest.
notempyear = dfyear.drop(['DATE','MONTH'], axis=1)
In [15]:
#Plot ALL weather data for all stations for a year
#X = weather station
#Y = day of the year
#Z = temperature
#you can click/hold in the graph below to rotate!
fig = go.Figure(data=[go.Surface(z=notempyear.values)])
fig.update_layout(title='Temperatures over time', autosize=False,
width=600, height=600)
fig.show()
In [11]:
#We need to make an index for the year. Create a set of data from 1 to 365 (or to 366 if it's a leap year!)
#We'll scale this by 100 as the index is made. This will help teh gradient descent converge 366 = 3.66
i = np.arange(0.01,3.66,0.01) #<---needs to be one GREATER than the total number of days
index = pd.DataFrame(data = i, columns = ['index'])
index
Out[11]:
| index | |
|---|---|
| 0 | 0.01 |
| 1 | 0.02 |
| 2 | 0.03 |
| 3 | 0.04 |
| 4 | 0.05 |
| ... | ... |
| 360 | 3.61 |
| 361 | 3.62 |
| 362 | 3.63 |
| 363 | 3.64 |
| 364 | 3.65 |
365 rows × 1 columns
In [18]:
n_rows = dfyear.shape[0]
n_rows
Out[18]:
365
In [19]:
#This will translate your chosen weather data into the X and y datasets needed for the optimization function.
X=index.to_numpy().reshape(n_rows,1)
X
Out[19]:
array([[0.01],
[0.02],
[0.03],
[0.04],
[0.05],
[0.06],
[0.07],
[0.08],
[0.09],
[0.1 ],
[0.11],
[0.12],
[0.13],
[0.14],
[0.15],
[0.16],
[0.17],
[0.18],
[0.19],
[0.2 ],
[0.21],
[0.22],
[0.23],
[0.24],
[0.25],
[0.26],
[0.27],
[0.28],
[0.29],
[0.3 ],
[0.31],
[0.32],
[0.33],
[0.34],
[0.35],
[0.36],
[0.37],
[0.38],
[0.39],
[0.4 ],
[0.41],
[0.42],
[0.43],
[0.44],
[0.45],
[0.46],
[0.47],
[0.48],
[0.49],
[0.5 ],
[0.51],
[0.52],
[0.53],
[0.54],
[0.55],
[0.56],
[0.57],
[0.58],
[0.59],
[0.6 ],
[0.61],
[0.62],
[0.63],
[0.64],
[0.65],
[0.66],
[0.67],
[0.68],
[0.69],
[0.7 ],
[0.71],
[0.72],
[0.73],
[0.74],
[0.75],
[0.76],
[0.77],
[0.78],
[0.79],
[0.8 ],
[0.81],
[0.82],
[0.83],
[0.84],
[0.85],
[0.86],
[0.87],
[0.88],
[0.89],
[0.9 ],
[0.91],
[0.92],
[0.93],
[0.94],
[0.95],
[0.96],
[0.97],
[0.98],
[0.99],
[1. ],
[1.01],
[1.02],
[1.03],
[1.04],
[1.05],
[1.06],
[1.07],
[1.08],
[1.09],
[1.1 ],
[1.11],
[1.12],
[1.13],
[1.14],
[1.15],
[1.16],
[1.17],
[1.18],
[1.19],
[1.2 ],
[1.21],
[1.22],
[1.23],
[1.24],
[1.25],
[1.26],
[1.27],
[1.28],
[1.29],
[1.3 ],
[1.31],
[1.32],
[1.33],
[1.34],
[1.35],
[1.36],
[1.37],
[1.38],
[1.39],
[1.4 ],
[1.41],
[1.42],
[1.43],
[1.44],
[1.45],
[1.46],
[1.47],
[1.48],
[1.49],
[1.5 ],
[1.51],
[1.52],
[1.53],
[1.54],
[1.55],
[1.56],
[1.57],
[1.58],
[1.59],
[1.6 ],
[1.61],
[1.62],
[1.63],
[1.64],
[1.65],
[1.66],
[1.67],
[1.68],
[1.69],
[1.7 ],
[1.71],
[1.72],
[1.73],
[1.74],
[1.75],
[1.76],
[1.77],
[1.78],
[1.79],
[1.8 ],
[1.81],
[1.82],
[1.83],
[1.84],
[1.85],
[1.86],
[1.87],
[1.88],
[1.89],
[1.9 ],
[1.91],
[1.92],
[1.93],
[1.94],
[1.95],
[1.96],
[1.97],
[1.98],
[1.99],
[2. ],
[2.01],
[2.02],
[2.03],
[2.04],
[2.05],
[2.06],
[2.07],
[2.08],
[2.09],
[2.1 ],
[2.11],
[2.12],
[2.13],
[2.14],
[2.15],
[2.16],
[2.17],
[2.18],
[2.19],
[2.2 ],
[2.21],
[2.22],
[2.23],
[2.24],
[2.25],
[2.26],
[2.27],
[2.28],
[2.29],
[2.3 ],
[2.31],
[2.32],
[2.33],
[2.34],
[2.35],
[2.36],
[2.37],
[2.38],
[2.39],
[2.4 ],
[2.41],
[2.42],
[2.43],
[2.44],
[2.45],
[2.46],
[2.47],
[2.48],
[2.49],
[2.5 ],
[2.51],
[2.52],
[2.53],
[2.54],
[2.55],
[2.56],
[2.57],
[2.58],
[2.59],
[2.6 ],
[2.61],
[2.62],
[2.63],
[2.64],
[2.65],
[2.66],
[2.67],
[2.68],
[2.69],
[2.7 ],
[2.71],
[2.72],
[2.73],
[2.74],
[2.75],
[2.76],
[2.77],
[2.78],
[2.79],
[2.8 ],
[2.81],
[2.82],
[2.83],
[2.84],
[2.85],
[2.86],
[2.87],
[2.88],
[2.89],
[2.9 ],
[2.91],
[2.92],
[2.93],
[2.94],
[2.95],
[2.96],
[2.97],
[2.98],
[2.99],
[3. ],
[3.01],
[3.02],
[3.03],
[3.04],
[3.05],
[3.06],
[3.07],
[3.08],
[3.09],
[3.1 ],
[3.11],
[3.12],
[3.13],
[3.14],
[3.15],
[3.16],
[3.17],
[3.18],
[3.19],
[3.2 ],
[3.21],
[3.22],
[3.23],
[3.24],
[3.25],
[3.26],
[3.27],
[3.28],
[3.29],
[3.3 ],
[3.31],
[3.32],
[3.33],
[3.34],
[3.35],
[3.36],
[3.37],
[3.38],
[3.39],
[3.4 ],
[3.41],
[3.42],
[3.43],
[3.44],
[3.45],
[3.46],
[3.47],
[3.48],
[3.49],
[3.5 ],
[3.51],
[3.52],
[3.53],
[3.54],
[3.55],
[3.56],
[3.57],
[3.58],
[3.59],
[3.6 ],
[3.61],
[3.62],
[3.63],
[3.64],
[3.65]])
In [20]:
ones = np.ones((n_rows,1))
ones.shape
Out[20]:
(365, 1)
In [21]:
#Represent x_0 as a vector of 1s for vector computation
ones = np.ones((n_rows,1))
X = np.concatenate((ones, index.to_numpy().reshape(n_rows, 1)), axis=1)
X
Out[21]:
array([[1. , 0.01],
[1. , 0.02],
[1. , 0.03],
[1. , 0.04],
[1. , 0.05],
[1. , 0.06],
[1. , 0.07],
[1. , 0.08],
[1. , 0.09],
[1. , 0.1 ],
[1. , 0.11],
[1. , 0.12],
[1. , 0.13],
[1. , 0.14],
[1. , 0.15],
[1. , 0.16],
[1. , 0.17],
[1. , 0.18],
[1. , 0.19],
[1. , 0.2 ],
[1. , 0.21],
[1. , 0.22],
[1. , 0.23],
[1. , 0.24],
[1. , 0.25],
[1. , 0.26],
[1. , 0.27],
[1. , 0.28],
[1. , 0.29],
[1. , 0.3 ],
[1. , 0.31],
[1. , 0.32],
[1. , 0.33],
[1. , 0.34],
[1. , 0.35],
[1. , 0.36],
[1. , 0.37],
[1. , 0.38],
[1. , 0.39],
[1. , 0.4 ],
[1. , 0.41],
[1. , 0.42],
[1. , 0.43],
[1. , 0.44],
[1. , 0.45],
[1. , 0.46],
[1. , 0.47],
[1. , 0.48],
[1. , 0.49],
[1. , 0.5 ],
[1. , 0.51],
[1. , 0.52],
[1. , 0.53],
[1. , 0.54],
[1. , 0.55],
[1. , 0.56],
[1. , 0.57],
[1. , 0.58],
[1. , 0.59],
[1. , 0.6 ],
[1. , 0.61],
[1. , 0.62],
[1. , 0.63],
[1. , 0.64],
[1. , 0.65],
[1. , 0.66],
[1. , 0.67],
[1. , 0.68],
[1. , 0.69],
[1. , 0.7 ],
[1. , 0.71],
[1. , 0.72],
[1. , 0.73],
[1. , 0.74],
[1. , 0.75],
[1. , 0.76],
[1. , 0.77],
[1. , 0.78],
[1. , 0.79],
[1. , 0.8 ],
[1. , 0.81],
[1. , 0.82],
[1. , 0.83],
[1. , 0.84],
[1. , 0.85],
[1. , 0.86],
[1. , 0.87],
[1. , 0.88],
[1. , 0.89],
[1. , 0.9 ],
[1. , 0.91],
[1. , 0.92],
[1. , 0.93],
[1. , 0.94],
[1. , 0.95],
[1. , 0.96],
[1. , 0.97],
[1. , 0.98],
[1. , 0.99],
[1. , 1. ],
[1. , 1.01],
[1. , 1.02],
[1. , 1.03],
[1. , 1.04],
[1. , 1.05],
[1. , 1.06],
[1. , 1.07],
[1. , 1.08],
[1. , 1.09],
[1. , 1.1 ],
[1. , 1.11],
[1. , 1.12],
[1. , 1.13],
[1. , 1.14],
[1. , 1.15],
[1. , 1.16],
[1. , 1.17],
[1. , 1.18],
[1. , 1.19],
[1. , 1.2 ],
[1. , 1.21],
[1. , 1.22],
[1. , 1.23],
[1. , 1.24],
[1. , 1.25],
[1. , 1.26],
[1. , 1.27],
[1. , 1.28],
[1. , 1.29],
[1. , 1.3 ],
[1. , 1.31],
[1. , 1.32],
[1. , 1.33],
[1. , 1.34],
[1. , 1.35],
[1. , 1.36],
[1. , 1.37],
[1. , 1.38],
[1. , 1.39],
[1. , 1.4 ],
[1. , 1.41],
[1. , 1.42],
[1. , 1.43],
[1. , 1.44],
[1. , 1.45],
[1. , 1.46],
[1. , 1.47],
[1. , 1.48],
[1. , 1.49],
[1. , 1.5 ],
[1. , 1.51],
[1. , 1.52],
[1. , 1.53],
[1. , 1.54],
[1. , 1.55],
[1. , 1.56],
[1. , 1.57],
[1. , 1.58],
[1. , 1.59],
[1. , 1.6 ],
[1. , 1.61],
[1. , 1.62],
[1. , 1.63],
[1. , 1.64],
[1. , 1.65],
[1. , 1.66],
[1. , 1.67],
[1. , 1.68],
[1. , 1.69],
[1. , 1.7 ],
[1. , 1.71],
[1. , 1.72],
[1. , 1.73],
[1. , 1.74],
[1. , 1.75],
[1. , 1.76],
[1. , 1.77],
[1. , 1.78],
[1. , 1.79],
[1. , 1.8 ],
[1. , 1.81],
[1. , 1.82],
[1. , 1.83],
[1. , 1.84],
[1. , 1.85],
[1. , 1.86],
[1. , 1.87],
[1. , 1.88],
[1. , 1.89],
[1. , 1.9 ],
[1. , 1.91],
[1. , 1.92],
[1. , 1.93],
[1. , 1.94],
[1. , 1.95],
[1. , 1.96],
[1. , 1.97],
[1. , 1.98],
[1. , 1.99],
[1. , 2. ],
[1. , 2.01],
[1. , 2.02],
[1. , 2.03],
[1. , 2.04],
[1. , 2.05],
[1. , 2.06],
[1. , 2.07],
[1. , 2.08],
[1. , 2.09],
[1. , 2.1 ],
[1. , 2.11],
[1. , 2.12],
[1. , 2.13],
[1. , 2.14],
[1. , 2.15],
[1. , 2.16],
[1. , 2.17],
[1. , 2.18],
[1. , 2.19],
[1. , 2.2 ],
[1. , 2.21],
[1. , 2.22],
[1. , 2.23],
[1. , 2.24],
[1. , 2.25],
[1. , 2.26],
[1. , 2.27],
[1. , 2.28],
[1. , 2.29],
[1. , 2.3 ],
[1. , 2.31],
[1. , 2.32],
[1. , 2.33],
[1. , 2.34],
[1. , 2.35],
[1. , 2.36],
[1. , 2.37],
[1. , 2.38],
[1. , 2.39],
[1. , 2.4 ],
[1. , 2.41],
[1. , 2.42],
[1. , 2.43],
[1. , 2.44],
[1. , 2.45],
[1. , 2.46],
[1. , 2.47],
[1. , 2.48],
[1. , 2.49],
[1. , 2.5 ],
[1. , 2.51],
[1. , 2.52],
[1. , 2.53],
[1. , 2.54],
[1. , 2.55],
[1. , 2.56],
[1. , 2.57],
[1. , 2.58],
[1. , 2.59],
[1. , 2.6 ],
[1. , 2.61],
[1. , 2.62],
[1. , 2.63],
[1. , 2.64],
[1. , 2.65],
[1. , 2.66],
[1. , 2.67],
[1. , 2.68],
[1. , 2.69],
[1. , 2.7 ],
[1. , 2.71],
[1. , 2.72],
[1. , 2.73],
[1. , 2.74],
[1. , 2.75],
[1. , 2.76],
[1. , 2.77],
[1. , 2.78],
[1. , 2.79],
[1. , 2.8 ],
[1. , 2.81],
[1. , 2.82],
[1. , 2.83],
[1. , 2.84],
[1. , 2.85],
[1. , 2.86],
[1. , 2.87],
[1. , 2.88],
[1. , 2.89],
[1. , 2.9 ],
[1. , 2.91],
[1. , 2.92],
[1. , 2.93],
[1. , 2.94],
[1. , 2.95],
[1. , 2.96],
[1. , 2.97],
[1. , 2.98],
[1. , 2.99],
[1. , 3. ],
[1. , 3.01],
[1. , 3.02],
[1. , 3.03],
[1. , 3.04],
[1. , 3.05],
[1. , 3.06],
[1. , 3.07],
[1. , 3.08],
[1. , 3.09],
[1. , 3.1 ],
[1. , 3.11],
[1. , 3.12],
[1. , 3.13],
[1. , 3.14],
[1. , 3.15],
[1. , 3.16],
[1. , 3.17],
[1. , 3.18],
[1. , 3.19],
[1. , 3.2 ],
[1. , 3.21],
[1. , 3.22],
[1. , 3.23],
[1. , 3.24],
[1. , 3.25],
[1. , 3.26],
[1. , 3.27],
[1. , 3.28],
[1. , 3.29],
[1. , 3.3 ],
[1. , 3.31],
[1. , 3.32],
[1. , 3.33],
[1. , 3.34],
[1. , 3.35],
[1. , 3.36],
[1. , 3.37],
[1. , 3.38],
[1. , 3.39],
[1. , 3.4 ],
[1. , 3.41],
[1. , 3.42],
[1. , 3.43],
[1. , 3.44],
[1. , 3.45],
[1. , 3.46],
[1. , 3.47],
[1. , 3.48],
[1. , 3.49],
[1. , 3.5 ],
[1. , 3.51],
[1. , 3.52],
[1. , 3.53],
[1. , 3.54],
[1. , 3.55],
[1. , 3.56],
[1. , 3.57],
[1. , 3.58],
[1. , 3.59],
[1. , 3.6 ],
[1. , 3.61],
[1. , 3.62],
[1. , 3.63],
[1. , 3.64],
[1. , 3.65]])
In [22]:
y=dfyear['BASEL_temp_mean'].to_numpy().reshape(n_rows,1) #<----INSERT WEATHER STATION HERE
In [23]:
X.shape, y.shape
Out[23]:
((365, 2), (365, 1))
In [33]:
#Look at one year of temperature data over time
plt.scatter(x=index['index'], y=dfyear['BUDAPEST_temp_mean']) #<----INSERT WEATHER STATION HERE
plt.xlabel('X'); plt.ylabel('y');
plt.title('Input dataset');
In [29]:
#what is the min temperature? (Note gradient descent is not actually finding this number)
dfyear['BUDAPEST_temp_mean'].min()
Out[29]:
-0.005166671
In [30]:
#what is the max temperature? (Note gradient descent is not actually finding this number)
dfyear['BUDAPEST_temp_mean'].max()
Out[30]:
-0.005166671
????????????????????????????????¶
In [31]:
dfyear
Out[31]:
| DATE | MONTH | BASEL_temp_mean | BELGRADE_temp_mean | BUDAPEST_temp_mean | DEBILT_temp_mean | DUSSELDORF_temp_mean | GDANSK_temp_mean | HEATHROW_temp_mean | KASSEL_temp_mean | LJUBLJANA_temp_mean | MAASTRICHT_temp_mean | MADRID_temp_mean | MUNCHENB_temp_mean | OSLO_temp_mean | ROMA_temp_mean | SONNBLICK_temp_mean | STOCKHOLM_temp_mean | TOURS_temp_mean | VALENTIA_temp_mean | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 22281 | 20210101 | 1 | -1.112591 | -0.653703 | -0.005167 | -1.111156 | -1.239786 | -1.272886 | -1.713826 | -0.005705 | -0.995445 | -1.222313 | -1.473528 | -1.107756 | -0.744778 | -0.000746 | -1.206588 | -0.540359 | -1.496644 | -0.007287 |
| 22282 | 20210102 | 1 | -1.221236 | -0.551561 | -0.005167 | -1.016223 | -1.195607 | -1.081061 | -1.504171 | -0.005705 | -0.741156 | -1.282325 | -1.716151 | -1.271132 | -0.671969 | -0.000746 | -0.884835 | -0.702087 | -1.763333 | -0.007287 |
| 22283 | 20210103 | 1 | -1.384204 | -0.415371 | -0.005167 | -1.063690 | -1.136700 | -1.042696 | -1.381872 | -0.005705 | -0.862246 | -1.207310 | -1.608319 | -1.208295 | -0.951071 | -0.000746 | -1.016461 | -0.938458 | -1.465269 | -0.007287 |
| 22284 | 20210104 | 1 | -1.329882 | -0.517514 | -0.005167 | -1.142801 | -1.269240 | -0.940389 | -1.259573 | -0.005705 | -0.886464 | -1.297328 | -1.648756 | -1.245997 | -1.145228 | -0.000746 | -1.191962 | -0.863814 | -1.575082 | -0.007287 |
| 22285 | 20210105 | 1 | -1.384204 | -0.699100 | -0.005167 | -1.126979 | -1.239786 | -0.799717 | -1.399343 | -0.005705 | -0.874355 | -1.267322 | -1.810505 | -1.296267 | -1.582083 | -0.000746 | -1.382089 | -0.975779 | -1.496644 | -0.007287 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 22641 | 20211227 | 12 | -0.515042 | -1.277907 | -0.005167 | -0.699778 | -0.621268 | -2.180860 | -0.473366 | -0.005705 | -0.910682 | -0.292124 | -0.368242 | -0.554792 | -1.812645 | -0.000746 | -0.606959 | -1.398759 | -0.037694 | -0.007287 |
| 22642 | 20211228 | 12 | -0.012558 | -1.130368 | -0.005167 | -0.193467 | -0.297282 | -1.925093 | -0.211297 | -0.005705 | -0.595848 | -0.142094 | -0.476075 | -0.328579 | -1.387925 | -0.000746 | -0.387582 | -0.926017 | 0.025056 | -0.007287 |
| 22643 | 20211229 | 12 | -0.107623 | -0.846639 | -0.005167 | -0.082711 | -0.150016 | -1.285675 | -0.001641 | -0.005705 | -0.971227 | -0.052075 | -0.691740 | -0.114934 | -1.497139 | -0.000746 | -0.475333 | -0.826492 | 0.134870 | -0.007287 |
| 22644 | 20211230 | 12 | 0.354120 | -0.846639 | -0.005167 | 0.455245 | 0.424323 | -0.876447 | 0.208014 | -0.005705 | -0.910682 | 0.503038 | -0.691740 | 0.400329 | -0.853992 | -0.000746 | 0.051171 | -0.577681 | 0.307434 | -0.007287 |
| 22645 | 20211231 | 12 | -0.216268 | -0.392673 | -0.005167 | 0.439422 | 0.380143 | -0.237030 | 0.452612 | -0.005705 | -1.068099 | 0.383013 | -0.637824 | 0.412896 | -0.963205 | -0.000746 | 0.489923 | -0.415953 | -0.225946 | -0.007287 |
365 rows × 20 columns
In [32]:
dfyear.describe()
Out[32]:
| DATE | MONTH | BASEL_temp_mean | BELGRADE_temp_mean | BUDAPEST_temp_mean | DEBILT_temp_mean | DUSSELDORF_temp_mean | GDANSK_temp_mean | HEATHROW_temp_mean | KASSEL_temp_mean | LJUBLJANA_temp_mean | MAASTRICHT_temp_mean | MADRID_temp_mean | MUNCHENB_temp_mean | OSLO_temp_mean | ROMA_temp_mean | SONNBLICK_temp_mean | STOCKHOLM_temp_mean | TOURS_temp_mean | VALENTIA_temp_mean | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 3.650000e+02 | 365.000000 | 365.000000 | 365.000000 | 3.650000e+02 | 365.000000 | 365.000000 | 365.000000 | 365.000000 | 3.650000e+02 | 365.000000 | 365.000000 | 365.000000 | 365.000000 | 365.000000 | 3.650000e+02 | 365.000000 | 365.000000 | 365.000000 | 3.650000e+02 |
| mean | 2.021067e+07 | 6.526027 | 0.032909 | 0.128766 | -5.166671e-03 | 0.072520 | 0.013712 | 0.088705 | 0.085045 | -5.705221e-03 | 0.101863 | 0.062647 | 0.089456 | 0.090138 | 0.102767 | -7.461640e-04 | 0.045401 | 0.099155 | 0.116518 | -7.287352e-03 |
| std | 3.454755e+02 | 3.452584 | 0.949455 | 1.005278 | 1.042263e-17 | 0.966594 | 0.975583 | 1.066883 | 1.010767 | 3.039933e-17 | 1.014904 | 0.964474 | 1.009745 | 0.926124 | 1.061531 | 4.777038e-18 | 1.026706 | 1.048349 | 0.985446 | 4.603327e-17 |
| min | 2.021010e+07 | 1.000000 | -2.212624 | -2.049648 | -5.166671e-03 | -2.503513 | -2.668269 | -2.986526 | -2.185550 | -5.705221e-03 | -1.855184 | -2.572588 | -2.470980 | -2.125714 | -2.152421 | -7.461640e-04 | -2.815348 | -2.294481 | -2.281025 | -7.287352e-03 |
| 25% | 2.021040e+07 | 4.000000 | -0.745913 | -0.631005 | -5.166671e-03 | -0.683956 | -0.680175 | -0.748564 | -0.683021 | -5.705221e-03 | -0.704829 | -0.667200 | -0.664782 | -0.667898 | -0.574890 | -7.461640e-04 | -0.636209 | -0.577681 | -0.649512 | -7.287352e-03 |
| 50% | 2.021070e+07 | 7.000000 | -0.012558 | -0.040850 | -5.166671e-03 | 0.043867 | -0.061656 | -0.006839 | -0.001641 | -5.705221e-03 | -0.087270 | -0.022069 | -0.085181 | 0.035875 | 0.068257 | -7.461640e-04 | 0.036546 | 0.019467 | 0.056432 | -7.287352e-03 |
| 75% | 2.021100e+07 | 10.000000 | 0.883765 | 0.935177 | -5.166671e-03 | 0.993200 | 0.880848 | 1.029017 | 0.994221 | -5.705221e-03 | 0.941995 | 0.923123 | 0.817918 | 0.865322 | 1.063315 | -7.461640e-04 | 0.855551 | 0.940070 | 0.966314 | -7.287352e-03 |
| max | 2.021123e+07 | 12.000000 | 2.078863 | 2.263027 | -5.166671e-03 | 2.211512 | 2.559684 | 2.320641 | 2.391923 | -5.705221e-03 | 2.092351 | 2.423428 | 2.489325 | 2.109492 | 2.082643 | -7.461640e-04 | 2.215685 | 2.358296 | 2.111511 | -7.287352e-03 |
(UN)fortunately, the data of year & location I chose - 2021, Budapest is not normal. I checked the data in 2021 again, and found that some location like Kassel, Roma, etc. have same issue.¶
We don't know exactly now, but we record this because it may affect machine learning.¶
In [24]:
# change the location from Budapest to BASEL
y=dfyear['BASEL_temp_mean'].to_numpy().reshape(n_rows,1) #<----INSERT WEATHER STATION HERE
X.shape, y.shape
#Look at one year of temperature data over time
plt.scatter(x=index['index'], y=dfyear['BASEL_temp_mean']) #<----INSERT WEATHER STATION HERE
plt.xlabel('X'); plt.ylabel('y');
plt.title('Input dataset');
In [25]:
#This computes the loss function for the gradiant descent. DO NOT CHANGE!
def compute_cost(X, y, theta=np.array([[0],[0]])):
"""Given covariate matrix X, the prediction results y and coefficients theta
compute the loss"""
m = len(y)
J=0 # initialize loss to zero
# reshape theta
theta=theta.reshape(2,1)
# calculate the hypothesis - y_hat
h_x = np.dot(X,theta)
#print(h_x)
# subtract y from y_hat, square and sum
error_term = sum((h_x - y)**2)
# divide by twice the number of samples - standard practice.
loss = error_term/(2*m)
return loss
In [26]:
compute_cost(X,y)
Out[26]:
array([0.45003918])
In [27]:
#This is the gradiant descent function. DO NOT CHANGE!
def gradient_descent(X, y, theta=np.array([[0],[0]]),
alpha=0.01, num_iterations=1500):
"""
Solve for theta using Gradient Descent optimiztion technique.
Alpha is the learning rate
"""
m = len(y)
J_history = []
theta0_history = []
theta1_history = []
theta = theta.reshape(2,1)
for i in range(num_iterations):
error = (np.dot(X, theta) - y)
term0 = (alpha/m) * sum(error* X[:,0].reshape(m,1))
term1 = (alpha/m) * sum(error* X[:,1].reshape(m,1))
# update theta
term_vector = np.array([[term0],[term1]])
#print(term_vector)
theta = theta - term_vector.reshape(2,1)
# store history values
theta0_history.append(theta[0].tolist()[0])
theta1_history.append(theta[1].tolist()[0])
J_history.append(compute_cost(X,y,theta).tolist()[0])
return (theta, J_history, theta0_history, theta1_history)
In [28]:
%%time
#This runs your data through a gradiant descent for the starting conditions in 'theta_init.'
#You will need to adjust these numbers
num_iterations=30#<---Decide how many iterations you need. Start small and work up. Over 10,000 iterations will take a few seconds.
theta_init=np.array([[1],[-1]]) #<---this is where you put the guess for [theta0], [theta1]. Start with 1 and 1.
alpha=0.05#<---Decide what your step size is. Try values between 0.1 and 0.00001. You will need to adjust your iterations.
#If your solution is not converging, try a smaller step size.
theta, J_history, theta0_history, theta1_history = gradient_descent(X,y, theta_init,
alpha, num_iterations)
theta
CPU times: total: 15.6 ms Wall time: 41.4 ms
Out[28]:
array([[ 0.82303785],
[-0.30862993]])
In [29]:
#This will plot your loss, theta0, and theta1.If the result looks like a straight line, it's not converging on an answer!
#Your loss (red) should be trending toward 0.
fig, ax1 = plt.subplots()
# plot thetas over time
color='tab:blue'
ax1.plot(theta0_history, label='$\\theta_{0}$', linestyle='--', color=color)
ax1.plot(theta1_history, label='$\\theta_{1}$', linestyle='-', color=color)
# ax1.legend()
ax1.set_xlabel('Iterations'); ax1.set_ylabel('$\\theta$', color=color);
ax1.tick_params(axis='y', labelcolor=color)
# plot loss function over time
color='tab:red'
ax2 = ax1.twinx()
ax2.plot(J_history, label='Loss function', color=color)
ax2.set_title('Values of $\\theta$ and $J(\\theta)$ over iterations')
ax2.set_ylabel('Loss: $J(\\theta)$', color=color)
ax1.tick_params(axis='y', labelcolor=color)
# ax2.legend();
fig.legend();
In [30]:
%%time
# theta range
theta0_vals = np.linspace(-10,10,100) #Look in the chart above for the limits of where theta0 and theta1 appear.
theta1_vals = np.linspace(-10,10,100) #Put those values as the first two "linspace" numbers in these lines
#Select with large margins, maybe +/- 10
J_vals = np.zeros((len(theta0_vals), len(theta1_vals)))
# compute cost for each combination of theta
c1=0; c2=0
for i in theta0_vals:
for j in theta1_vals:
t = np.array([i, j])
J_vals[c1][c2] = compute_cost(X, y, t.transpose()).tolist()[0]
c2=c2+1
c1=c1+1
c2=0 # reinitialize to 0
CPU times: total: 719 ms Wall time: 2.73 s
In [31]:
#This figure shows the loss function.
#X = Theta0
#Y - Theta1
#Z = Loss
#Find where it is closest to 0 in X and Y!
#you can click/hold in the graph below to rotate!
fig = go.Figure(data=[go.Surface(x=theta1_vals, y=theta0_vals, z=J_vals)])
fig.update_layout(title='Loss function for different thetas', autosize=True,
width=600, height=600, xaxis_title='theta0',
yaxis_title='theta1')
fig.show()
In [41]:
#Here is the same figure as above, with the line the loss function takes toward the minimum.
#X = Theta0
#Y - Theta1
#Z = Loss
#black line = path of loss function over the iterations.
#Find where it is closest to 0 in X and Y!
#you can click/hold in the graph below to rotate!
line_marker = dict(color='#101010', width=2)
fig = go.Figure()
fig.add_surface(x=theta1_vals, y=theta0_vals, z=J_vals)
fig.add_scatter3d(x=theta1_history, y=theta0_history, z=J_history, line=line_marker, name='')
#The below line adds a graph of just the loss over iterations in a 2D plane
plt.plot(theta0_history, theta1_history, 'r+');
fig.update_layout(title='Loss function for different thetas', autosize=True,
width=600, height=600, xaxis_title='theta0',
yaxis_title='theta1')
fig.show()
In [42]:
#Rerun the optimization above, but this time start closer to the objective!
#Find where the black line ends near the lowest X/Y/Z coordinate and make that your guess below.
num_iterations=30 #<---start with the same iterations as above
theta_init=np.array([[-0.5],[-1.10]]) #<---make a guess as to a more accurate [x],[y] coordinates near the minimum in the graph above.
alpha= 0.01 #<---start with the same step size as above
theta1, J_history1, theta0_history1, theta1_history1 = gradient_descent(X,y, theta_init,
alpha, num_iterations)
In [43]:
#Let's look at the new loss path on the function. It should start much closer to the goal
line_marker = dict(color='#101010', width=2)
fig = go.Figure()
fig.add_surface(x=theta1_vals, y=theta0_vals, z=J_vals)
fig.add_scatter3d(x=theta1_history1, y=theta0_history1, z=J_history1, line=line_marker, name='')
#The below line adds a graph of just the loss over iterations in a 2D plane
plt.plot(theta0_history1, theta1_history1, 'r+');
fig.update_layout(title='Loss function for different thetas', autosize=True,
width=600, height=600, xaxis_title='theta0',
yaxis_title='theta1')
fig.show()
In [44]:
#This plot shows the convergence similar to above, but only in the X/Y plane (there's no height)
plt.contour(theta0_vals, theta1_vals, J_vals, levels = np.logspace(0,10,1000))
plt.xlabel('$\\theta_{0}$'); plt.ylabel("$\\theta_{1}$")
plt.title("Contour plot of loss function for different values of $\\theta$s");
plt.plot(theta0_history1, theta1_history1, 'r+');
In [ ]:
#How well does gradient descent converge? How much do you need to adjust between different weather stations and years?
In [ ]:
Investigate at least three different weather stations and three different years and find values where the gradient descent converges.¶
Take screenshots of the loss function and profiles (similar to the images above) and paste them in a document along with the starting and ending theta0, theta1, iterations, and step sizes for each year and station you investigate.
Bael 2010¶
In [53]:
dfyear = df[df['DATE'].astype(str).str.contains('2010')] #<-----INSERT YEAR HERE
dfyear = df[df['DATE'].astype(str).str.startswith('2010')]
y=dfyear['BASEL_temp_mean'].to_numpy().reshape(n_rows,1) #<----INSERT WEATHER STATION HERE
In [54]:
%%time
#This runs your data through a gradiant descent for the starting conditions in 'theta_init.'
#You will need to adjust these numbers
num_iterations=15 #<---Decide how many iterations you need. Start small and work up. Over 10,000 iterations will take a few seconds.
theta_init=np.array([[-0.5],[1.1]]) #<---this is where you put the guess for [theta0], [theta1]. Start with 1 and 1.
alpha=0.05#<---Decide what your step size is. Try values between 0.1 and 0.00001. You will need to adjust your iterations.
#If your solution is not converging, try a smaller step size.
theta, J_history, theta0_history, theta1_history = gradient_descent(X,y, theta_init,
alpha, num_iterations)
theta
CPU times: total: 0 ns Wall time: 13.3 ms
Out[54]:
array([[-0.74479474],
[ 0.33625032]])
In [55]:
#This will plot your loss, theta0, and theta1.If the result looks like a straight line, it's not converging on an answer!
#Your loss (red) should be trending toward 0.
fig, ax1 = plt.subplots()
# plot thetas over time
color='tab:blue'
ax1.plot(theta0_history, label='$\\theta_{0}$', linestyle='--', color=color)
ax1.plot(theta1_history, label='$\\theta_{1}$', linestyle='-', color=color)
# ax1.legend()
ax1.set_xlabel('Iterations'); ax1.set_ylabel('$\\theta$', color=color);
ax1.tick_params(axis='y', labelcolor=color)
# plot loss function over time
color='tab:red'
ax2 = ax1.twinx()
ax2.plot(J_history, label='Loss function', color=color)
ax2.set_title('Values of $\\theta$ and $J(\\theta)$ over iterations')
ax2.set_ylabel('Loss: $J(\\theta)$', color=color)
ax1.tick_params(axis='y', labelcolor=color)
# ax2.legend();
fig.legend();
In [57]:
line_marker = dict(color='#101010', width=2)
fig = go.Figure()
fig.add_surface(x=theta1_vals, y=theta0_vals, z=J_vals)
fig.add_scatter3d(x=theta1_history, y=theta0_history, z=J_history, line=line_marker, name='')
#The below line adds a graph of just the loss over iterations in a 2D plane
plt.plot(theta0_history, theta1_history, 'r+');
fig.update_layout(title='Loss function for different thetas', autosize=True,
width=600, height=600, xaxis_title='theta0',
yaxis_title='theta1')
fig.show()
BASEL 1998¶
In [ ]:
In [58]:
dfyear = df[df['DATE'].astype(str).str.contains('1998')] #<-----INSERT YEAR HERE
dfyear = df[df['DATE'].astype(str).str.startswith('1998')]
y=dfyear['BASEL_temp_mean'].to_numpy().reshape(n_rows,1) #<----INSERT WEATHER STATION HERE
In [59]:
%%time
#This runs your data through a gradiant descent for the starting conditions in 'theta_init.'
#You will need to adjust these numbers
num_iterations=30 #<---Decide how many iterations you need. Start small and work up. Over 10,000 iterations will take a few seconds.
theta_init=np.array([[-0.1],[0.5]]) #<---this is where you put the guess for [theta0], [theta1]. Start with 1 and 1.
alpha=0.01 #<---Decide what your step size is. Try values between 0.1 and 0.00001. You will need to adjust your iterations.
#If your solution is not converging, try a smaller step size.
theta, J_history, theta0_history, theta1_history = gradient_descent(X,y, theta_init,
alpha, num_iterations)
theta
CPU times: total: 15.6 ms Wall time: 32.3 ms
Out[59]:
array([[-0.20958018],
[ 0.20838722]])
In [60]:
#This will plot your loss, theta0, and theta1.If the result looks like a straight line, it's not converging on an answer!
#Your loss (red) should be trending toward 0.
fig, ax1 = plt.subplots()
# plot thetas over time
color='tab:blue'
ax1.plot(theta0_history, label='$\\theta_{0}$', linestyle='--', color=color)
ax1.plot(theta1_history, label='$\\theta_{1}$', linestyle='-', color=color)
# ax1.legend()
ax1.set_xlabel('Iterations'); ax1.set_ylabel('$\\theta$', color=color);
ax1.tick_params(axis='y', labelcolor=color)
# plot loss function over time
color='tab:red'
ax2 = ax1.twinx()
ax2.plot(J_history, label='Loss function', color=color)
ax2.set_title('Values of $\\theta$ and $J(\\theta)$ over iterations')
ax2.set_ylabel('Loss: $J(\\theta)$', color=color)
ax1.tick_params(axis='y', labelcolor=color)
# ax2.legend();
fig.legend();
In [46]:
line_marker = dict(color='#101010', width=2)
fig = go.Figure()
fig.add_surface(x=theta1_vals, y=theta0_vals, z=J_vals)
fig.add_scatter3d(x=theta1_history, y=theta0_history, z=J_history, line=line_marker, name='')
#The below line adds a graph of just the loss over iterations in a 2D plane
plt.plot(theta0_history, theta1_history, 'r+');
fig.update_layout(title='Loss function for different thetas', autosize=True,
width=600, height=600, xaxis_title='theta0',
yaxis_title='theta1')
fig.show()
STOCLHOLM 1: 2019¶
In [ ]:
In [62]:
dfyear = df[df['DATE'].astype(str).str.contains('2019')] #<-----INSERT YEAR HERE
dfyear = df[df['DATE'].astype(str).str.startswith('2019')]
y=dfyear['STOCKHOLM_temp_mean'].to_numpy().reshape(n_rows,1) #<----INSERT WEATHER STATION HERE
In [63]:
%%time
#This runs your data through a gradiant descent for the starting conditions in 'theta_init.'
#You will need to adjust these numbers
num_iterations=30 #<---Decide how many iterations you need. Start small and work up. Over 10,000 iterations will take a few seconds.
theta_init=np.array([[1],[1]]) #<---this is where you put the guess for [theta0], [theta1]. Start with 1 and 1.
alpha=0.05#<---Decide what your step size is. Try values between 0.1 and 0.00001. You will need to adjust your iterations.
#If your solution is not converging, try a smaller step size.
theta, J_history, theta0_history, theta1_history = gradient_descent(X,y, theta_init,
alpha, num_iterations)
theta
CPU times: total: 15.6 ms Wall time: 25.2 ms
Out[63]:
array([[ 0.31185703],
[-0.02013077]])
In [64]:
#This will plot your loss, theta0, and theta1.If the result looks like a straight line, it's not converging on an answer!
#Your loss (red) should be trending toward 0.
fig, ax1 = plt.subplots()
# plot thetas over time
color='tab:blue'
ax1.plot(theta0_history, label='$\\theta_{0}$', linestyle='--', color=color)
ax1.plot(theta1_history, label='$\\theta_{1}$', linestyle='-', color=color)
# ax1.legend()
ax1.set_xlabel('Iterations'); ax1.set_ylabel('$\\theta$', color=color);
ax1.tick_params(axis='y', labelcolor=color)
# plot loss function over time
color='tab:red'
ax2 = ax1.twinx()
ax2.plot(J_history, label='Loss function', color=color)
ax2.set_title('Values of $\\theta$ and $J(\\theta)$ over iterations')
ax2.set_ylabel('Loss: $J(\\theta)$', color=color)
ax1.tick_params(axis='y', labelcolor=color)
# ax2.legend();
fig.legend();
In [65]:
line_marker = dict(color='#101010', width=2)
fig = go.Figure()
fig.add_surface(x=theta1_vals, y=theta0_vals, z=J_vals)
fig.add_scatter3d(x=theta1_history, y=theta0_history, z=J_history, line=line_marker, name='')
#The below line adds a graph of just the loss over iterations in a 2D plane
plt.plot(theta0_history, theta1_history, 'r+');
fig.update_layout(title='Loss function for different thetas', autosize=True,
width=600, height=600, xaxis_title='theta0',
yaxis_title='theta1')
fig.show()
STOCKHOLM 2: 2005¶
In [70]:
dfyear = df[df['DATE'].astype(str).str.contains('2005')] #<-----INSERT YEAR HERE
dfyear = df[df['DATE'].astype(str).str.startswith('2005')]
y=dfyear['STOCKHOLM_temp_mean'].to_numpy().reshape(n_rows,1) #<----INSERT WEATHER STATION HERE
In [71]:
%%time
#This runs your data through a gradiant descent for the starting conditions in 'theta_init.'
#You will need to adjust these numbers
num_iterations=15 #<---Decide how many iterations you need. Start small and work up. Over 10,000 iterations will take a few seconds.
theta_init=np.array([[-2],[0.1]]) #<---this is where you put the guess for [theta0], [theta1]. Start with 1 and 1.
alpha=0.05#<---Decide what your step size is. Try values between 0.1 and 0.00001. You will need to adjust your iterations.
#If your solution is not converging, try a smaller step size.
theta, J_history, theta0_history, theta1_history = gradient_descent(X,y, theta_init,
alpha, num_iterations)
theta
CPU times: total: 0 ns Wall time: 14.8 ms
Out[71]:
array([[-1.49680181],
[ 0.71812687]])
In [72]:
#This will plot your loss, theta0, and theta1.If the result looks like a straight line, it's not converging on an answer!
#Your loss (red) should be trending toward 0.
fig, ax1 = plt.subplots()
# plot thetas over time
color='tab:blue'
ax1.plot(theta0_history, label='$\\theta_{0}$', linestyle='--', color=color)
ax1.plot(theta1_history, label='$\\theta_{1}$', linestyle='-', color=color)
# ax1.legend()
ax1.set_xlabel('Iterations'); ax1.set_ylabel('$\\theta$', color=color);
ax1.tick_params(axis='y', labelcolor=color)
# plot loss function over time
color='tab:red'
ax2 = ax1.twinx()
ax2.plot(J_history, label='Loss function', color=color)
ax2.set_title('Values of $\\theta$ and $J(\\theta)$ over iterations')
ax2.set_ylabel('Loss: $J(\\theta)$', color=color)
ax1.tick_params(axis='y', labelcolor=color)
# ax2.legend();
fig.legend();
In [73]:
line_marker = dict(color='#101010', width=2)
fig = go.Figure()
fig.add_surface(x=theta1_vals, y=theta0_vals, z=J_vals)
fig.add_scatter3d(x=theta1_history, y=theta0_history, z=J_history, line=line_marker, name='')
#The below line adds a graph of just the loss over iterations in a 2D plane
plt.plot(theta0_history, theta1_history, 'r+');
fig.update_layout(title='Loss function for different thetas', autosize=True,
width=600, height=600, xaxis_title='theta0',
yaxis_title='theta1')
fig.show()
STOCKHOLM 3: 1995¶
In [ ]:
In [74]:
dfyear = df[df['DATE'].astype(str).str.contains('1995')] #<-----INSERT YEAR HERE
dfyear = df[df['DATE'].astype(str).str.startswith('1995')]
y=dfyear['STOCKHOLM_temp_mean'].to_numpy().reshape(n_rows,1) #<----INSERT WEATHER STATION HERE
In [75]:
%%time
#This runs your data through a gradiant descent for the starting conditions in 'theta_init.'
#You will need to adjust these numbers
num_iterations=15 #<---Decide how many iterations you need. Start small and work up. Over 10,000 iterations will take a few seconds.
theta_init=np.array([[-0.5],[1.1]]) #<---this is where you put the guess for [theta0], [theta1]. Start with 1 and 1.
alpha=0.05#<---Decide what your step size is. Try values between 0.1 and 0.00001. You will need to adjust your iterations.
#If your solution is not converging, try a smaller step size.
theta, J_history, theta0_history, theta1_history = gradient_descent(X,y, theta_init,
alpha, num_iterations)
theta
CPU times: total: 0 ns Wall time: 18.2 ms
Out[75]:
array([[-0.72696776],
[ 0.33896967]])
In [76]:
#This will plot your loss, theta0, and theta1.If the result looks like a straight line, it's not converging on an answer!
#Your loss (red) should be trending toward 0.
fig, ax1 = plt.subplots()
# plot thetas over time
color='tab:blue'
ax1.plot(theta0_history, label='$\\theta_{0}$', linestyle='--', color=color)
ax1.plot(theta1_history, label='$\\theta_{1}$', linestyle='-', color=color)
# ax1.legend()
ax1.set_xlabel('Iterations'); ax1.set_ylabel('$\\theta$', color=color);
ax1.tick_params(axis='y', labelcolor=color)
# plot loss function over time
color='tab:red'
ax2 = ax1.twinx()
ax2.plot(J_history, label='Loss function', color=color)
ax2.set_title('Values of $\\theta$ and $J(\\theta)$ over iterations')
ax2.set_ylabel('Loss: $J(\\theta)$', color=color)
ax1.tick_params(axis='y', labelcolor=color)
# ax2.legend();
fig.legend();
In [77]:
line_marker = dict(color='#101010', width=2)
fig = go.Figure()
fig.add_surface(x=theta1_vals, y=theta0_vals, z=J_vals)
fig.add_scatter3d(x=theta1_history, y=theta0_history, z=J_history, line=line_marker, name='')
#The below line adds a graph of just the loss over iterations in a 2D plane
plt.plot(theta0_history, theta1_history, 'r+');
fig.update_layout(title='Loss function for different thetas', autosize=True,
width=600, height=600, xaxis_title='theta0',
yaxis_title='theta1')
fig.show()
VALENTIA 1: 2021¶
In [ ]:
In [84]:
dfyear = df[df['DATE'].astype(str).str.contains('2021')] #<-----INSERT YEAR HERE
dfyear = df[df['DATE'].astype(str).str.startswith('2021')]
y=dfyear['VALENTIA_temp_mean'].to_numpy().reshape(n_rows,1) #<----INSERT WEATHER STATION HERE
In [88]:
%%time
#This runs your data through a gradiant descent for the starting conditions in 'theta_init.'
#You will need to adjust these numbers
num_iterations=1000 #<---Decide how many iterations you need. Start small and work up. Over 10,000 iterations will take a few seconds.
theta_init=np.array([[0.5],[0.5]]) #<---this is where you put the guess for [theta0], [theta1]. Start with 1 and 1.
alpha=0.0001#<---Decide what your step size is. Try values between 0.1 and 0.00001. You will need to adjust your iterations.
#If your solution is not converging, try a smaller step size.
theta, J_history, theta0_history, theta1_history = gradient_descent(X,y, theta_init,
alpha, num_iterations)
theta
CPU times: total: 312 ms Wall time: 921 ms
Out[88]:
array([[0.3882126 ],
[0.25471709]])
In [89]:
#This will plot your loss, theta0, and theta1.If the result looks like a straight line, it's not converging on an answer!
#Your loss (red) should be trending toward 0.
fig, ax1 = plt.subplots()
# plot thetas over time
color='tab:blue'
ax1.plot(theta0_history, label='$\\theta_{0}$', linestyle='--', color=color)
ax1.plot(theta1_history, label='$\\theta_{1}$', linestyle='-', color=color)
# ax1.legend()
ax1.set_xlabel('Iterations'); ax1.set_ylabel('$\\theta$', color=color);
ax1.tick_params(axis='y', labelcolor=color)
# plot loss function over time
color='tab:red'
ax2 = ax1.twinx()
ax2.plot(J_history, label='Loss function', color=color)
ax2.set_title('Values of $\\theta$ and $J(\\theta)$ over iterations')
ax2.set_ylabel('Loss: $J(\\theta)$', color=color)
ax1.tick_params(axis='y', labelcolor=color)
# ax2.legend();
fig.legend();
In [90]:
line_marker = dict(color='#101010', width=2)
fig = go.Figure()
fig.add_surface(x=theta1_vals, y=theta0_vals, z=J_vals)
fig.add_scatter3d(x=theta1_history, y=theta0_history, z=J_history, line=line_marker, name='')
#The below line adds a graph of just the loss over iterations in a 2D plane
plt.plot(theta0_history, theta1_history, 'r+');
fig.update_layout(title='Loss function for different thetas', autosize=True,
width=600, height=600, xaxis_title='theta0',
yaxis_title='theta1')
fig.show()
In [ ]:
VALENTIA 2: 2007¶
In [ ]:
In [99]:
dfyear = df[df['DATE'].astype(str).str.contains('2007')] #<-----INSERT YEAR HERE
dfyear = df[df['DATE'].astype(str).str.startswith('2007')]
y=dfyear['VALENTIA_temp_mean'].to_numpy().reshape(n_rows,1) #<----INSERT WEATHER STATION HERE
In [100]:
%%time
#This runs your data through a gradiant descent for the starting conditions in 'theta_init.'
#You will need to adjust these numbers
num_iterations=5000 #<---Decide how many iterations you need. Start small and work up. Over 10,000 iterations will take a few seconds.
theta_init=np.array([[0.5],[-1]]) #<---this is where you put the guess for [theta0], [theta1]. Start with 1 and 1.
alpha=0.001#<---Decide what your step size is. Try values between 0.1 and 0.00001. You will need to adjust your iterations.
#If your solution is not converging, try a smaller step size.
theta, J_history, theta0_history, theta1_history = gradient_descent(X,y, theta_init,
alpha, num_iterations)
theta
CPU times: total: 1 s Wall time: 3.76 s
Out[100]:
array([[0.10816721],
[0.1654668 ]])
In [101]:
#This will plot your loss, theta0, and theta1.If the result looks like a straight line, it's not converging on an answer!
#Your loss (red) should be trending toward 0.
fig, ax1 = plt.subplots()
# plot thetas over time
color='tab:blue'
ax1.plot(theta0_history, label='$\\theta_{0}$', linestyle='--', color=color)
ax1.plot(theta1_history, label='$\\theta_{1}$', linestyle='-', color=color)
# ax1.legend()
ax1.set_xlabel('Iterations'); ax1.set_ylabel('$\\theta$', color=color);
ax1.tick_params(axis='y', labelcolor=color)
# plot loss function over time
color='tab:red'
ax2 = ax1.twinx()
ax2.plot(J_history, label='Loss function', color=color)
ax2.set_title('Values of $\\theta$ and $J(\\theta)$ over iterations')
ax2.set_ylabel('Loss: $J(\\theta)$', color=color)
ax1.tick_params(axis='y', labelcolor=color)
# ax2.legend();
fig.legend();
In [102]:
line_marker = dict(color='#101010', width=2)
fig = go.Figure()
fig.add_surface(x=theta1_vals, y=theta0_vals, z=J_vals)
fig.add_scatter3d(x=theta1_history, y=theta0_history, z=J_history, line=line_marker, name='')
#The below line adds a graph of just the loss over iterations in a 2D plane
plt.plot(theta0_history, theta1_history, 'r+');
fig.update_layout(title='Loss function for different thetas', autosize=True,
width=600, height=600, xaxis_title='theta0',
yaxis_title='theta1')
fig.show()
VALENTIA 3: 1998¶
In [ ]:
In [103]:
dfyear = df[df['DATE'].astype(str).str.contains('1998')] #<-----INSERT YEAR HERE
dfyear = df[df['DATE'].astype(str).str.startswith('1998')]
y=dfyear['VALENTIA_temp_mean'].to_numpy().reshape(n_rows,1) #<----INSERT WEATHER STATION HERE
In [104]:
%%time
#This runs your data through a gradiant descent for the starting conditions in 'theta_init.'
#You will need to adjust these numbers
num_iterations=500 #<---Decide how many iterations you need. Start small and work up. Over 10,000 iterations will take a few seconds.
theta_init=np.array([[-0.1],[0.3]]) #<---this is where you put the guess for [theta0], [theta1]. Start with 1 and 1.
alpha=0.001#<---Decide what your step size is. Try values between 0.1 and 0.00001. You will need to adjust your iterations.
#If your solution is not converging, try a smaller step size.
theta, J_history, theta0_history, theta1_history = gradient_descent(X,y, theta_init,
alpha, num_iterations)
theta
CPU times: total: 344 ms Wall time: 552 ms
Out[104]:
array([[-0.15834828],
[ 0.22357141]])
In [52]:
#This will plot your loss, theta0, and theta1.If the result looks like a straight line, it's not converging on an answer!
#Your loss (red) should be trending toward 0.
fig, ax1 = plt.subplots()
# plot thetas over time
color='tab:blue'
ax1.plot(theta0_history, label='$\\theta_{0}$', linestyle='--', color=color)
ax1.plot(theta1_history, label='$\\theta_{1}$', linestyle='-', color=color)
# ax1.legend()
ax1.set_xlabel('Iterations'); ax1.set_ylabel('$\\theta$', color=color);
ax1.tick_params(axis='y', labelcolor=color)
# plot loss function over time
color='tab:red'
ax2 = ax1.twinx()
ax2.plot(J_history, label='Loss function', color=color)
ax2.set_title('Values of $\\theta$ and $J(\\theta)$ over iterations')
ax2.set_ylabel('Loss: $J(\\theta)$', color=color)
ax1.tick_params(axis='y', labelcolor=color)
# ax2.legend();
fig.legend();
In [46]:
line_marker = dict(color='#101010', width=2)
fig = go.Figure()
fig.add_surface(x=theta1_vals, y=theta0_vals, z=J_vals)
fig.add_scatter3d(x=theta1_history, y=theta0_history, z=J_history, line=line_marker, name='')
#The below line adds a graph of just the loss over iterations in a 2D plane
plt.plot(theta0_history, theta1_history, 'r+');
fig.update_layout(title='Loss function for different thetas', autosize=True,
width=600, height=600, xaxis_title='theta0',
yaxis_title='theta1')
fig.show()